{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import pickle\n",
    "\n",
    "%config Completer.use_jedi = False\n",
    "\n",
    "joinpath = os.path.join"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>filename</th>\n",
       "      <th>granularity</th>\n",
       "      <th>dims</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>citytemp</td>\n",
       "      <td>city_temperature_2020.csv</td>\n",
       "      <td>city</td>\n",
       "      <td>AvgTemperature</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>test</td>\n",
       "      <td>test</td>\n",
       "      <td>test</td>\n",
       "      <td>test</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id      name                   filename granularity            dims\n",
       "0   1  citytemp  city_temperature_2020.csv        city  AvgTemperature\n",
       "1   2      test                       test        test            test"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rawdatainfo = pd.read_csv('datamanager/overview_rawdata.csv', sep=';')\n",
    "rawdatainfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '../data/computed_data/country_codes.pickle'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-5-2d38a39f4b80>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mfile\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'../data/computed_data/country_codes.pickle'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'rb'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m     \u001b[0munloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0munloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mccode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/computed_data/country_codes.pickle'"
     ]
    }
   ],
   "source": [
    "file = '../data/computed_data/country_codes.pickle'\n",
    "with open(file, 'rb') as f:\n",
    "    unloc = pickle.load(f)\n",
    "unloc.ccode.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unloc.groupby('loccode').ccode.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unloc[unloc.loccode == 'ZZZ']\n",
    "unloc.insert(2, 'citycode', unloc.ccode + unloc.loccode)\n",
    "#unloc['citycode'] = unloc.ccode + unloc.loccode\n",
    "unloc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_data_path = 'data/01_raw_data/'\n",
    "\n",
    "!ls {raw_data_path}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!head {os.path.join(raw_data_path,'Index.csv ') }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in os.listdir(raw_data_path): \n",
    "    print('\\n', file)\n",
    "    !head {os.path.join(raw_data_path, file) }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rawdatainfo = pd.read_csv(joinpath('datamanager/overview_rawdata.csv'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "citytemp = pd.read_csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datamanager import processing\n",
    "cleaner = getattr(processing, f'clean_{citytemp}')\n",
    "\n",
    "cleaner()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rawdatainfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "numdata = pd.DataFrame(columns=['location_name', 'location_type'])\n",
    "\n",
    "\n",
    "for idx, row in rawdatainfo.iterrows():\n",
    "    print(f'Processing file {filename}')\n",
    "\n",
    "    \n",
    "    #\n",
    "    cleaner = getattr(processing, f'clean_{row.dataset}')\n",
    "    \n",
    "    # read file\n",
    "    file_raw = pd.read_csv()\n",
    "    \n",
    "    # apply cleaning\n",
    "    file = cleaner(file_raw)\n",
    "    \n",
    "    # join \n",
    "    numdata.merge(how='outer')\n",
    "    print(idx, row)\n",
    "    #print(datafile.name[idx])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
